Add HTML and Text formats. From Scott at brynen.com.
authorrobertl <robertl@f51c46e8-681c-474f-0cfe-069cfd0219fb>
Tue, 6 Apr 2004 20:58:32 +0000 (20:58 +0000)
committerrobertl <robertl@f51c46e8-681c-474f-0cfe-069cfd0219fb>
Tue, 6 Apr 2004 20:58:32 +0000 (20:58 +0000)
util.c: tweak html honkers.

gpsbabel/Makefile
gpsbabel/README
gpsbabel/defs.h
gpsbabel/html.c [new file with mode: 0644]
gpsbabel/text.c [new file with mode: 0644]
gpsbabel/util.c
gpsbabel/vecs.c

index c1d64145c6a4747d25bdcc703aa8c4d7d0c08a8c..97976c4ce7ef434f4351a62542be2343c0d90d4c 100644 (file)
@@ -19,7 +19,7 @@ FMTS=magproto.o gpx.o geo.o mapsend.o mapsource.o \
        psp.o holux.o garmin.o tmpro.o tpg.o \
        xcsv.o gcdb.o tiger.o internal_styles.o easygps.o quovadis.o \
        gpilots.o saroute.o navicache.o psitrex.o geoniche.o delgpl.o \
-       ozi.o nmea.o
+       ozi.o nmea.o text.o html.o
 
 FILTERS=position.o duplicate.o arcdist.o polygon.o smplrout.o reverse_route.o
 
index c9c48e0a44b150313aa5c25ee8aac845af1b6d00..3915fd4d1ab04c9964681f9b74cd0fed740ea427 100644 (file)
@@ -497,6 +497,19 @@ THE FORMATS
        http://www.kolumbus.fi/eino.uikkanen/geoconvgb/index.htm
        http://www.commlinx.com.au/GPS_recorder.htm
 
+    TEXT
+
+        This is a simple human readable version of the data file, handy for
+        listings of any type of waypoint files.
+
+    HTML
+
+        HTML output generates a single HTML file of all of the waypoints in
+        the input file.  It supports a number of Geocaching GPX extensions,
+        as well as filters out potentially harmful HTML from the input file
+        while maintaining almost all of the source HTML formatting.
+
+
 DATA FILTERS
 
        GPSBabel supports data filtering.  Data filters are invoked from
index 20fd14bef35b1b9b3b790349da5b34dc766c2232..bed5562426c87bb2deea5e2b0f39e5df846bbacd 100644 (file)
@@ -413,7 +413,9 @@ signed int get_tz_offset(void);
 signed int month_lookup(const char *m);
 const char *get_cache_icon(const waypoint *waypointp);
 char * xml_entitize(const char * str);
+char * html_entitize(const char * str);
 char * strip_html(const utf_string*);
+char * strip_nastyhtml(const char * in);
 char * str_utf8_to_cp1252( const char * str );
 char * str_utf8_to_ascii( const char * str );
 
diff --git a/gpsbabel/html.c b/gpsbabel/html.c
new file mode 100644 (file)
index 0000000..8a128bb
--- /dev/null
@@ -0,0 +1,164 @@
+/*
+    Output only format for Human Readable formats.
+
+    Copyright (C) 2004 Scott Brynen, scott (at) brynen.com
+    Copyright (C) 2002 Robert Lipe, robertlipe@usa.net
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111 USA
+*/
+
+
+#include "defs.h"
+#include "jeeps/gpsmath.h"
+#include <ctype.h>
+
+static FILE *file_out;
+static void *mkshort_handle;
+
+static char *stylesheet = NULL;
+
+#define MYNAME "HTML"
+
+static
+arglist_t html_args[] = {
+       { "stylesheet", &stylesheet, 
+               "Path to HTML style sheet", ARGTYPE_STRING },
+       {0, 0, 0, 0}
+};
+
+
+
+static void
+wr_init(const char *fname)
+{
+       file_out = xfopen(fname, "w", MYNAME);
+       mkshort_handle = mkshort_new_handle();
+}
+
+static void
+wr_deinit(void)
+{
+       fclose(file_out);
+       mkshort_del_handle(mkshort_handle);
+}
+
+static void
+html_disp(const waypoint *wpt)
+{
+       int latint, lonint;
+       char tbuf[1024];
+       time_t tm = wpt->creation_time;
+       long utmz;
+       double utme, utmn;
+       char utmzc;
+       
+       lonint = abs(wpt->longitude);
+       latint = abs(wpt->latitude);
+       GPS_Math_WGS84_To_UTM_EN(wpt->latitude, wpt->longitude, 
+               &utme, &utmn, &utmz, &utmzc);
+
+       if (tm == 0) 
+               tm = time(NULL);
+       strftime(tbuf, sizeof(tbuf), "%d-%b-%Y", localtime(&tm));
+
+
+       fprintf(file_out, "<hr>\n");
+       fprintf(file_out, "<a name=\"%s\"></a><table width=\"100%%\"><tr><td>\n", wpt->shortname);
+       fprintf(file_out, "<h3 class=\"waypoint\">%s - %c%d&deg;%06.3f %c%d&deg;%06.3f (%ld%c %6.0f %7.0f)",
+               (global_opts.synthesize_shortnames) ? mkshort(mkshort_handle, wpt->description) : wpt->shortname,
+               wpt->latitude < 0 ? 'S' : 'N',  abs(latint), 60.0 * (fabs(wpt->latitude) - latint), 
+               wpt->longitude < 0 ? 'W' : 'E', abs(lonint), 60.0 * (fabs(wpt->longitude) - lonint),
+               utmz, utmzc, utme, utmn);
+       if (wpt->altitude != unknown_alt) 
+               fprintf (file_out, " alt: %1.1f", wpt->altitude);
+       fprintf (file_out, "<br>\n");
+
+       if (strcmp(wpt->description, wpt->shortname)) {
+               if (wpt->url) {
+                       char *d = html_entitize(wpt->description);
+                       fprintf(file_out, "<a href=\"%s\">%s</a></h3>\n", wpt->url, d);
+                       xfree(d);
+               }
+               else {
+                       fprintf(file_out, "%s</h3>\n", wpt->description);
+               }               
+               
+       }
+       if (wpt->gc_data.terr) {
+               if (wpt->gc_data.desc_short.utfstring) {
+                       fprintf (file_out, "<p class=\"descshort\">%s</p>\n", strip_nastyhtml(wpt->gc_data.desc_short.utfstring));
+                       }
+               if (wpt->gc_data.desc_long.utfstring) {
+                       fprintf (file_out, "<p class=\"desclong\">%s</p>\n", strip_nastyhtml(wpt->gc_data.desc_long.utfstring));
+                       }
+               if (wpt->gc_data.hint) {
+                       fprintf (file_out, "<p class=\"hint\"><strong>Hint:</strong> %s</p>\n", wpt->gc_data.hint);
+               }
+       }
+       else if (strcmp(wpt->notes,wpt->description)) {
+               fprintf (file_out, "<p class=\"notes\">%s</p>\n", wpt->notes);
+       }
+       fprintf(file_out, "</td></tr></table>\n");
+}
+
+static void
+html_index(const waypoint *wpt)
+{
+       char *sn = html_entitize(wpt->shortname);
+       char *d = html_entitize(wpt->description);
+
+       fprintf(file_out, "<a href=\"#%s\">%s - %s</a><br>\n", sn, sn, d);
+
+       xfree(sn);
+       xfree(d);
+}
+
+static void
+data_write(void)
+{
+       setshort_length(mkshort_handle, 6);
+
+       fprintf(file_out, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">\n");
+       fprintf(file_out, "<html>\n");
+       fprintf(file_out, "<head>\n");
+       fprintf(file_out, " <title>GPSBabel HTML Output</title>\n");
+       fprintf(file_out, " <meta name=\"Generator\" content=\"GPSBabel\">\n");
+       if (stylesheet) 
+               fprintf(file_out, " <link rel=\"stylesheet\" type=\"text/css\" href=\"%s\">\n", stylesheet);
+       fprintf(file_out, "</head>\n");
+       fprintf(file_out, "<body>\n");
+
+       fprintf(file_out, "<p class=\"index\">\n");
+       waypt_disp_all(html_index);
+       fprintf(file_out, "</p>\n");
+       
+       waypt_disp_all(html_disp);
+
+       fprintf(file_out, "</body>");
+       fprintf(file_out, "</html>");
+
+}
+
+
+ff_vecs_t html_vecs = {
+       ff_type_file,
+       NULL,
+       wr_init,
+       NULL,
+       wr_deinit,
+       NULL,
+       data_write,
+       html_args
+};
diff --git a/gpsbabel/text.c b/gpsbabel/text.c
new file mode 100644 (file)
index 0000000..cebd5a8
--- /dev/null
@@ -0,0 +1,133 @@
+/*
+    Output only format for Human Readable formats.
+
+    Copyright (C) 2004 Scott Brynen, scott (at) brynen.com
+    Copyright (C) 2002 Robert Lipe, robertlipe@usa.net
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111 USA
+*/
+
+
+#include "defs.h"
+#include "jeeps/gpsmath.h"
+#include <ctype.h>
+
+static FILE *file_out;
+static void *mkshort_handle;
+
+static char *suppresssep = NULL;
+
+#define MYNAME "TEXT"
+
+static
+arglist_t text_args[] = {
+       { "nosep", &suppresssep, 
+               "Suppress separator lines between waypoints", ARGTYPE_BOOL },
+       {0, 0, 0, 0}
+};
+
+
+
+static void
+wr_init(const char *fname)
+{
+       file_out = xfopen(fname, "w", MYNAME);
+       mkshort_handle = mkshort_new_handle();
+}
+
+static void
+wr_deinit(void)
+{
+       fclose(file_out);
+       mkshort_del_handle(mkshort_handle);
+}
+
+static void
+text_disp(const waypoint *wpt)
+{
+       int latint, lonint;
+       char tbuf[1024];
+       time_t tm = wpt->creation_time;
+       long utmz;
+       double utme, utmn;
+       char utmzc;
+       
+       lonint = abs(wpt->longitude);
+       latint = abs(wpt->latitude);
+
+       GPS_Math_WGS84_To_UTM_EN(wpt->latitude, wpt->longitude, 
+               &utme, &utmn, &utmz, &utmzc);
+
+       if (tm == 0) 
+               tm = time(NULL);
+       strftime(tbuf, sizeof(tbuf), "%d-%b-%Y", localtime(&tm));
+
+       fprintf(file_out, "%-16s  %c%d %06.3f  %c%d %06.3f  (%ld%c %6.0f %7.0f)",
+               (global_opts.synthesize_shortnames) ? mkshort(mkshort_handle, wpt->description) : wpt->shortname,
+               wpt->latitude < 0 ? 'S' : 'N',  abs(latint), 60.0 * (fabs(wpt->latitude) - latint), 
+               wpt->longitude < 0 ? 'W' : 'E', abs(lonint), 60.0 * (fabs(wpt->longitude) - lonint),
+               utmz, utmzc, utme, utmn);
+       if (wpt->altitude != unknown_alt) 
+               fprintf (file_out, "  alt: %1.1f", wpt->altitude);
+       fprintf (file_out, "\n");
+       if (strcmp(wpt->description, wpt->shortname)) {
+               fprintf(file_out, "%s\n", wpt->description);
+       }
+       if (wpt->gc_data.terr) {
+               if (wpt->gc_data.desc_short.utfstring) {
+                       char *stripped_html = strip_html(&wpt->gc_data.desc_short);
+                       fprintf (file_out, "\n%s\n", stripped_html);
+                       xfree(stripped_html);
+                       }
+               if (wpt->gc_data.desc_long.utfstring) {
+                       char *stripped_html = strip_html(&wpt->gc_data.desc_long);
+                       fprintf (file_out, "\n%s\n", stripped_html);
+                       xfree(stripped_html);
+                       }
+               if (wpt->gc_data.hint) {
+                       fprintf (file_out, "\nHint: %s\n", wpt->gc_data.hint);
+               }
+       }
+       else if (strcmp(wpt->notes,wpt->description)) {
+               fprintf (file_out, "%s\n", wpt->notes);
+       }
+       if (! suppresssep) 
+               fprintf(file_out, "-----------------------------------------------------------------------------\n");
+       else
+               fprintf(file_out, "\n");
+               
+       
+}
+
+static void
+data_write(void)
+{
+       if (! suppresssep) 
+               fprintf(file_out, "-----------------------------------------------------------------------------\n");
+       setshort_length(mkshort_handle, 6);
+       waypt_disp_all(text_disp);
+}
+
+
+ff_vecs_t text_vecs = {
+       ff_type_file,
+       NULL,
+       wr_init,
+       NULL,
+       wr_deinit,
+       NULL,
+       data_write,
+       text_args
+};
index dfad3fb36f6685ba208c553284283d89a629c956..e16b53821e35d30070a1278f72a37b424b9303d2 100644 (file)
@@ -784,6 +784,61 @@ char * str_utf8_to_ascii( const char * str )
        return result;
 }
 
+/* 
+ * Get rid of potentially nasty HTML that would influence another record
+ * that includes;
+ * <body> - to stop backgrounds from being loaded
+ * </body> and </html>- stop processing altogether
+ * <style> </style> - stop overriding styles for everything
+ */
+char *
+strip_nastyhtml(const char * in)
+{
+       char *returnstr, *sp;
+       char *lcstr, *lcp;
+       int i;
+       
+       sp = returnstr = xstrdup(in);
+       lcp = lcstr = xstrdup(in);
+       
+       while (*lcp) {
+               *lcp = tolower(*lcp);
+               lcp++;
+       }
+       while (lcp = strstr(lcstr, "<body")) {   /* becomes <---- */
+               sp = returnstr + (lcp - lcstr) ;
+               sp++; *sp++ = '-'; *sp++ = '-'; *sp++ = '-'; *sp++ = '-'; 
+               *lcp = '*';         /* so we wont find it again */
+       }
+       while (lcp = strstr(lcstr, "</body")) {
+               sp = returnstr + (lcp - lcstr) ; /* becomes </---- */
+               sp++; sp++; *sp++ = '-'; *sp++ = '-'; *sp++ = '-'; *sp++ = '-'; 
+               *lcp = '*';         /* so we wont find it again */
+       }
+       while (lcp = strstr(lcstr, "</html")) {
+               sp = returnstr + (lcp - lcstr) ; /* becomes </---- */
+               sp++; sp++; *sp++ = '-'; *sp++ = '-'; *sp++ = '-'; *sp++ = '-'; 
+               *lcp = '*';         /* so we wont find it again */
+       }
+       while (lcp = strstr(lcstr, "<style")) {
+               sp = returnstr + (lcp - lcstr) ; /* becomes <!--   */
+               sp++; *sp++ = '!'; *sp++ = '-'; *sp++ = '-';  *sp++ = ' '; *sp++ = ' '; *sp = ' ';
+               *lcp = '*';         /* so we wont find it again */
+       }
+       while (lcp = strstr(lcstr, "</style>")) {
+               sp = returnstr + (lcp - lcstr) ; /* becomes    --> */
+               *sp++ = ' '; *sp++ = ' '; *sp++ = ' '; *sp++ = ' '; *sp++ = ' '; *sp++ = '-'; *sp++ = '-'; 
+               *lcp = '*';         /* so we wont find it again */
+       }
+       while (lcp = strstr(lcstr, "<image")) {
+               sp = returnstr + (lcp - lcstr) ; /* becomes <img */
+               sp+=3; *sp++ = 'g'; *sp++ = ' '; *sp++ = ' ';
+               *lcp = '*';
+       }
+       xfree (lcstr);
+       return (returnstr);
+}
+       
 /*
  *  Without getting into all the complexity of technically legal HTML,
  *  this function tries to strip "ugly" parts of it to make it more 
@@ -794,54 +849,96 @@ char *
 strip_html(const utf_string *in)
 {
        char *outstring, *out;
-       int ctr;
        char *instr = in->utfstring;
-
+       char tag[8];
+       short int taglen;
+       
        if (!in->is_html)
                return in->utfstring;
        /*
         * We only shorten, so just dupe the input buf for space.
         */
-       out = outstring = xstrdup(in->utfstring);
-       outstring[0] = 0;
-
-       for(ctr=0; ; instr++) {
-               switch(*instr) {
-                       case 0: 
-                               return (out);
-
-                       case '<':
-                               if (instr[1] == 'p')
-                                       *outstring++ = '\n';
-                               ctr++;
-                               break;
-                       case '>':
-                               ctr--;
-                               break;
-                       case '\n':
-                               continue;
-                       default:
-                               if (ctr == 0) {
-                                       *outstring++ = *instr;
-                               }
+
+       outstring = out = xstrdup(in->utfstring);
+
+       tag[0] = 0;
+       while (*instr) {
+               if ((*instr == '<') || (*instr == '&')) {
+                       tag[0] = *instr;
+                       taglen = 0;
+               }
+               
+               if (! tag[0]) {
+                       if (*instr != '\n')
+                               *out++ = *instr;
                }
+               else {
+                       if (taglen < (sizeof(tag)-1)) {
+                               tag[taglen++] = tolower(*instr);
+                               tag[taglen] = 0;
+                       }
+               }
+               
+               if ( ((tag[0] == '<') && (*instr == '>')) ||
+                    ((tag[0] == '&') && (*instr == ';')) ) {
+                       if (! strcmp(tag,"&amp;"))
+                               *out++ = '&';
+                       else if (! strcmp (tag, "&lt;"))
+                               *out++ = '<';
+                       else if (! strcmp (tag, "&gt;"))
+                               *out++ = '>';
+                       else if (! strcmp (tag, "&quot;"))
+                               *out++ = '"';
+                       else if (! strcmp (tag, "&nbsp;"))
+                               *out++ = ' ';
+                       else if (! strcmp (tag, "&deg;")) {
+                               *out++ = 'd'; *out++ = 'e'; *out++ = 'g';
+                       }
+                       else if ((tag[0]=='<') && (tag[1]=='p'))
+                               *out++ = '\n';
+                       else if ((tag[0]=='<') && (tag[1]=='b') && (tag[2]=='r'))
+                               *out++ = '\n';
+                       else if ((tag[0]=='<') && (tag[1]=='/') && (tag[2]=='t') && (tag[3]=='r'))
+                               *out++ = '\n';
+                       else if ((tag[0]=='<') && (tag[1]=='/') && (tag[2]=='t') && (tag[3]=='d'))
+                               *out++ = ' ';
+                       else if ((tag[0]=='<') && (tag[1]=='i') && (tag[2]=='m') && (tag[3]=='g')) {
+                               *out++ = '['; *out++ = 'I'; *out++ = 'M'; *out++ = 'G'; *out++ = ']';
+                       }
+                       
+                     tag[0] = 0;
+               }
+               *instr++;
        }
+       *out++ = 0;
+       return (outstring);
 }
 
-char * xml_entitize(const char * str) 
+typedef struct {
+       const char * text;
+       const char * entity;
+       int  not_html;
+} entity_types;
+
+static 
+entity_types stdentities[] =  {
+       { "&",  "&amp;", 0 },
+       { "'",  "&apos;", 1 },
+       { "<",  "&lt;", 0 },
+       { ">",  "&gt;", 0 },
+       { "\"", "&quot;", 0 },
+       { NULL, NULL, 0 }
+};
+
+static 
+char * 
+entitize(const char * str, int is_html) 
 {
        int elen, ecount, nsecount;
-       const char ** ep;
+       entity_types *ep;
        const char * cp;
        char * p, * tmp, * xstr;
-       const char * stdentities[] = {
-       "&",    "&amp;",
-       "<",    "&lt;",
-       ">",    "&gt;",
-       "'",    "&apos;",
-       "\"",   "&quot;",
-       NULL,   NULL 
-       };
+
        char tmpsub[20];
        int bytes = 0;
        int value = 0;
@@ -849,14 +946,14 @@ char * xml_entitize(const char * str)
        elen = ecount = nsecount = 0;
 
        /* figure # of entity replacements and additional size. */
-       while (*ep) {
+       while (ep->text) {
                cp = str;
-               while ((cp = strstr(cp, *ep)) != NULL) {
-                       elen += strlen(*(ep + 1)) - strlen(*ep);
+               while ((cp = strstr(cp, ep->text)) != NULL) {
+                       elen += strlen(ep->entity) - strlen(ep->text);
                        ecount++;
-                       cp += strlen(*ep);
+                       cp += strlen(ep->text);
                }
-               ep += 2;
+               ep++;
        }
        
        /* figure the same for other than standard entities (i.e. anything
@@ -880,23 +977,23 @@ char * xml_entitize(const char * str)
                return (tmp);
 
         if ( ecount != 0 ) {   
-               ep = stdentities;
-
-               while (*ep) {
+               for (ep = stdentities; ep->text; ep++) {
                        p = tmp;
-                       while ((p = strstr(p, *ep)) != NULL) {
-                               elen = strlen(*(ep + 1));
+                       if (is_html && ep->not_html)  {
+                               continue;
+                       }
+                       while ((p = strstr(p, ep->text)) != NULL) {
+                               elen = strlen(ep->entity);
 
-                               xstr = xstrdup(p + strlen(*ep));
+                               xstr = xstrdup(p + strlen(ep->text));
 
-                               strcpy(p, *(ep + 1));
+                               strcpy(p, ep->entity);
                                strcpy(p + elen, xstr);
 
                                xfree(xstr);
 
                                p += elen;
                        }  
-                       ep += 2;
                }
        }
 
@@ -925,3 +1022,17 @@ char * xml_entitize(const char * str)
        }       
        return (tmp);
 }
+
+/*
+ * Public callers for the above to hide the absence of &apos from HTML
+ */
+
+char * xml_entitize(const char * str) 
+{
+       return entitize(str, 0);
+}
+
+char * html_entitize(const char * str) 
+{
+       return entitize(str, 1);
+}
index 46a1fa714d51e41493bfe162163690afefa37999..c205bf5ebc9d227721d667bc6ddf5f066fc42ed2 100644 (file)
@@ -60,6 +60,8 @@ extern ff_vecs_t geoniche_vecs;
 extern ff_vecs_t gpl_vecs;
 extern ff_vecs_t ozi_vecs;
 extern ff_vecs_t nmea_vecs;
+extern ff_vecs_t text_vecs;
+extern ff_vecs_t html_vecs;
 
 static
 vecs_t vec_list[] = {
@@ -244,6 +246,18 @@ vecs_t vec_list[] = {
                "NMEA 0183 sentences",
                NULL
        },
+       {
+               &text_vecs,
+               "text",
+               "Textual Output",
+               NULL
+       },
+       {
+               &html_vecs,
+               "html",
+               "HTML Output",
+               NULL
+       },
        {
                NULL,
                NULL,